import numpy as np
import pandas as pd
import os
import requests
import pprint
import matplotlib.pyplot as plt
import requests
import operator
from datetime import datetime
from dateutil import tz
import operator
from collections import OrderedDict
from math import isnan
import json
import re
from IPython.display import Image
def wordcloud_plot(text, colour):
wordcloud = WordCloud(
width = 3000,
height = 2000,
background_color = colour,
stopwords = STOPWORDS).generate(str(text))
fig = plt.figure(
figsize = (40, 30),
facecolor = 'k',
edgecolor = 'k')
plt.imshow(wordcloud, interpolation = 'bilinear')
plt.axis('off')
plt.tight_layout(pad=0)
return plt.show()
def clean_tweet(tweet):
return ' '.join(re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
path = os.getcwd()
path += '/incindia.json'
tweets = ""
with open(path, 'r') as f:
for line in f.readlines():
tweets += line
data = json.loads(tweets)
mostLiked, likes, likeurl = 0, 0, ""
mostComment, comment, commenturl = 0, 0, ""
for post in data['GraphImages']:
if likes < post['edge_media_preview_like']['count']:
mostLiked = post['id']
likeurl = post['shortcode']
likes = post['edge_media_preview_like']['count']
if comment < post['edge_media_to_comment']['count']:
mostComment = post['id']
commenturl = post['shortcode']
comment = post['edge_media_to_comment']['count']
print("Post has the most likes' ID is: ",mostLiked)
print("Total Likes: ",likes)
likeurl
Image("screenshot/insta-inc1.png")
print("Post has the most comments' ID is: ",mostComment)
print("Total Comments: ",comment)
commenturl
Image("screenshot/insta-inc2.png")
df = []
for post in data['GraphImages']:
df.append([int(post['edge_media_preview_like']['count']), int(post['edge_media_to_comment']['count']), int(post['id'])])
df = pd.DataFrame(df, columns=['likes', 'comments', 'id'])
newdf = df.sort_values(['likes', 'comments'], ascending=[False, False])
newdf.head()
hashtags = {}
for post in data['GraphImages']:
if 'tags' in post and len(post['tags']) > 0:
for tag in post['tags']:
if not tag in hashtags:
hashtags[tag] = 1
else:
hashtags[tag] += 1
sorted_x = sorted(hashtags.items(), key=operator.itemgetter(1))
sorted_x[len(sorted_x)-5:len(sorted_x)]
import json
from wordcloud import WordCloud, STOPWORDS
path = os.getcwd()
path += '/bjp4india.json'
tweets = ""
with open(path, 'r') as f:
for line in f.readlines():
tweets += line
data = json.loads(tweets)
count = 0
for post in data['GraphImages']:
if post['is_video']:
count += 1
def showPieChart(photoTweet, rest):
slices_hours = [photoTweet, rest]
activities = ['Photo in Tweet', 'Other']
explode = (0.012, 0.012)
colors = ['orange', 'coral']
fig1, ax1 = plt.subplots()
ax1.pie(slices_hours, explode=explode,colors=colors, labels=activities, autopct='%1.1f%%',
shadow=True, startangle=120)
ax1.axis('equal')
plt.show()
showPieChart(count, len(data['GraphImages'])-count)
df = []
for post in data['GraphImages']:
text = post['edge_media_to_caption']['edges'][0]['node']['text']
df.append([int(post['edge_media_preview_like']['count']), int(post['edge_media_to_comment']['count']), int(post['id']), text])
df = pd.DataFrame(df, columns=['likes', 'comments', 'id', 'post'])
newdf = df.sort_values(['likes', 'comments'], ascending=[False, False])
newdf.head()
tweets = newdf['post'][0 : 200]
stopwords = set(STOPWORDS)
stopwords.update(['Nan','Unknown','https','co','@', 'shri'])
cleanTweets = []
for i in range(len(tweets)):
cleanTweets.append(clean_tweet(str(tweets.iloc[[i]].iloc[0])))
wordcloud_plot(cleanTweets, 'white')
The most occuring words are:
hashtags = {}
for post in data['GraphImages']:
if 'tags' in post and len(post['tags']) > 0:
for tag in post['tags']:
if not tag in hashtags:
hashtags[tag] = 1
else:
hashtags[tag] += 1
hashtagCount = {}
for data in hashtags:
if not type(data) is str:
continue
if data not in hashtagCount:
hashtagCount[data] = 1
else:
hashtagCount[data] += 1
sorted_x = sorted(hashtags.items(), key=operator.itemgetter(1))
print("Top 5 Hashtags : ")
sorted_x[len(sorted_x)-5:len(sorted_x)]
path = os.getcwd()
path += '/explore_2019-04-14-05-00-26.json'
tweets = ""
with open(path, 'r') as f:
for line in f.readlines():
tweets += line
data = json.loads(tweets)[0]
df = []
for post in data['posts']:
df.append([post['caption'], int(post['likes']['count']), int(post['views']), int(post['comments']['count']), post['url']])
df = pd.DataFrame(df, columns=['caption', 'likes', 'views', 'comments', 'url'])
df.head()
newdf = df.sort_values(['views', 'likes', 'comments'], ascending=[False, False, False])
newdf.head()
urls = newdf['url'][0:5]
Image("screenshot/insta-explore1.png")
Image("screenshot/insta-explore2.png")
Image("screenshot/insta-explore3.png")
Image("screenshot/insta-explore4.png")
Image("screenshot/insta-explore5.png")
count = 0
for post in data['posts']:
if len(post['imgs']) > 1:
count += 1
count
showPieChart(count, len(data['posts'])-count)